Air temperature (C), Wind Speed (m/s), Wind Direction (degrees from; 0=from N, 90=from E, etc.)

Daily Data

df = read.csv(file="all_days.csv",header=T)
head(df)
##   X DOY    WS  WD    AT   n
## 1 1   1 11.89 234  1.34 288
## 2 2   2  9.66 250 -8.07 288
## 3 3   3  8.11 214 -2.25 288
## 4 4   4  7.93 257 -2.40 288
## 5 5   5  5.41 236 -6.81 288
## 6 6   6  7.74 288 -2.97 288

Monthly Data

df.agg = read.csv(file="agg.csv",header=T)
head(df.agg)
##   X Year Month    m_ws     m_at     m_wd Order
## 1 1 2011     1 7.18194 -5.02968 238.6452     1
## 2 2 2011     2 8.33893 -2.38464 192.1786     2
## 3 3 2011     3 6.96677  1.91097 138.0000     3
## 4 4 2011     4 7.97367  7.60433 176.9667     4
## 5 5 2011     5 7.75290 12.23323 188.7419     5
## 6 6 2011     6 6.23533 18.66533 200.8333     6

Daily

windrose(speed = df$WS,
                 direction =df$WD,
                 speed_cuts = seq(0,10,2),
                 ggtheme='minimal')

Monthly

windrose(speed = df.agg$m_ws,
                 direction =df.agg$m_wd,
                 speed_cuts = seq(0,10,2),
                 ggtheme='minimal')

Monthly

df1 <- ts(df.agg$m_ws,start=c(2011,1),frequency=12)
head(df1)
##          Jan     Feb     Mar     Apr     May     Jun
## 2011 7.18194 8.33893 6.96677 7.97367 7.75290 6.23533
autoplot(df1,ylab="Monthly Wind Speed")

Daily

df0 <- ts(df$WS,start=c(2011,1,1),frequency=365)
head(df0)
## Time Series:
## Start = c(2011, 1) 
## End = c(2011, 6) 
## Frequency = 365 
## [1] 11.89  9.66  8.11  7.93  5.41  7.74
autoplot(df0,ylab="Daily Wind Speed")

train_data = window(df1,start=c(2011,1),end=c(2019,12))
test_data = window(df1,start=c(2020,1),end=c(2021,12))
autoplot(train_data)

kpss.test(train_data)
## Warning in kpss.test(train_data): p-value greater than printed p-value
## 
##  KPSS Test for Level Stationarity
## 
## data:  train_data
## KPSS Level = 0.10721, Truncation lag parameter = 4, p-value = 0.1
lambda_train<-BoxCox.lambda(train_data)
t_train<-BoxCox(train_data,lambda_train)
t_test<-BoxCox(test_data,lambda_train)
plot(t_train)

windtimeseriescomponents <- decompose(train_data)
plot(windtimeseriescomponents)

tsdisplay(train_data)

Regression model

regression.model<-tslm(t_train~trend+season)
summary(regression.model)
## 
## Call:
## tslm(formula = t_train ~ trend + season)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0310234 -0.0059667  0.0007706  0.0065515  0.0219059 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  8.782e-01  4.006e-03 219.237  < 2e-16 ***
## trend       -8.025e-05  3.395e-05  -2.364   0.0201 *  
## season2     -1.427e-03  5.153e-03  -0.277   0.7824    
## season3     -9.563e-03  5.154e-03  -1.856   0.0666 .  
## season4     -1.118e-02  5.154e-03  -2.170   0.0325 *  
## season5     -2.849e-02  5.155e-03  -5.527 2.84e-07 ***
## season6     -4.336e-02  5.156e-03  -8.410 4.10e-13 ***
## season7     -5.910e-02  5.157e-03 -11.459  < 2e-16 ***
## season8     -4.923e-02  5.159e-03  -9.544 1.56e-15 ***
## season9     -2.368e-02  5.160e-03  -4.589 1.36e-05 ***
## season10    -2.164e-03  5.162e-03  -0.419   0.6760    
## season11     3.709e-03  5.164e-03   0.718   0.4744    
## season12    -2.463e-03  5.167e-03  -0.477   0.6347    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01093 on 95 degrees of freedom
## Multiple R-squared:  0.8048, Adjusted R-squared:  0.7801 
## F-statistic: 32.64 on 12 and 95 DF,  p-value: < 2.2e-16
regression.preditct<-forecast(regression.model,h=24)
autoplot(regression.preditct)

autoplot(test_data) +
  autolayer(InvBoxCox(regression.preditct$mean, lambda_train), series="Regression") +
  ggtitle("Forecast Wind Speed for 2020-2021.") +
  xlab("Year") + ylab("Speed")

checkresiduals(regression.model)

## 
##  Breusch-Godfrey test for serial correlation of order up to 22
## 
## data:  Residuals from Linear regression model
## LM test = 23.672, df = 22, p-value = 0.3646
accuracy(InvBoxCox(regression.preditct$mean, lambda_train), test_data)
##                 ME      RMSE       MAE      MPE     MAPE        ACF1 Theil's U
## Test set 0.1453971 0.5838959 0.5281564 1.569681 7.961352 -0.09136558 0.6072062

sARIMA model

arima.model<-auto.arima(train_data,seasonal=TRUE,lambda = 'auto')
arima.model
## Series: train_data 
## ARIMA(0,0,0)(0,1,1)[12] with drift 
## Box Cox transformation: lambda= -0.8999268 
## 
## Coefficients:
##          sma1   drift
##       -0.8078  -1e-04
## s.e.   0.1385   1e-04
## 
## sigma^2 = 0.0002048:  log likelihood = 266.3
## AIC=-526.59   AICc=-526.33   BIC=-518.9
arima.preditct<-forecast(arima.model,h=24)
autoplot(arima.preditct)

autoplot(arima.preditct) + autolayer(test_data) + ggtitle("Seasonal ARIMA Model Predict")

autoplot(test_data) +
  autolayer(arima.preditct$mean, series="Seasonal ARIMA model") +
  ggtitle("Forecast Wind Speed for 2020-2021.") +
  xlab("Year") + ylab("Speed")

checkresiduals(arima.model)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,0,0)(0,1,1)[12] with drift
## Q* = 21.637, df = 20, p-value = 0.3605
## 
## Model df: 2.   Total lags used: 22
accuracy(arima.preditct$mean, test_data)
##                 ME      RMSE      MAE      MPE     MAPE       ACF1 Theil's U
## Test set 0.1503864 0.5651745 0.502009 1.652634 7.523286 -0.1641713 0.5814483

Regression with ARIMA Errors

agg.ws = ts(df.agg$m_ws,start=c(2011,1),end = c(2021,12),frequency=12)
agg.at = ts(df.agg$m_at,start=c(2011,1),end = c(2021,12),frequency=12)
agg.wd = ts(df.agg$m_wd,start=c(2011,1),end = c(2021,12),frequency=12)
train.agg.ws = window(agg.ws,start=c(2011,1),end=c(2019,12))
test.agg.ws = window(agg.ws,start=c(2020,1),end=c(2021,12))
train.agg.at = window(agg.at,start=c(2011,1),end=c(2019,12))
test.agg.at = window(agg.at,start=c(2020,1),end=c(2021,12))
train.agg.wd = window(agg.wd,start=c(2011,1),end=c(2019,12))
test.agg.wd = window(agg.wd,start=c(2020,1),end=c(2021,12))
reg_arima.model <- auto.arima(train.agg.ws, xreg = train.agg.at + train.agg.wd,lambda = 'auto')
reg_arima.model
## Series: train.agg.ws 
## Regression with ARIMA(0,0,0)(0,1,1)[12] errors 
## Box Cox transformation: lambda= -0.8999268 
## 
## Coefficients:
##          sma1   drift   xreg
##       -0.8009  -1e-04  1e-04
## s.e.   0.1344   1e-04  1e-04
## 
## sigma^2 = 0.0002048:  log likelihood = 266.98
## AIC=-525.96   AICc=-525.52   BIC=-515.7
checkresiduals(reg_arima.model)

## 
##  Ljung-Box test
## 
## data:  Residuals from Regression with ARIMA(0,0,0)(0,1,1)[12] errors
## Q* = 21.397, df = 19, p-value = 0.3153
## 
## Model df: 3.   Total lags used: 22
reg_arima.predict = forecast(reg_arima.model,xreg = test.agg.at + test.agg.wd, h=24)
autoplot(reg_arima.predict)

autoplot(reg_arima.predict) + autolayer(test_data) + ggtitle("Regression with ARIMA errors model Predict")

autoplot(test_data) +
  autolayer(reg_arima.predict$mean, series="Regression with ARIMA errors") +
  ggtitle("Forecast Wind Speed for 2020-2021.") +
  xlab("Year") + ylab("Speed")

accuracy(reg_arima.predict$mean, test_data)
##                 ME      RMSE       MAE      MPE     MAPE       ACF1 Theil's U
## Test set 0.1942033 0.5405681 0.4770955 2.354906 7.080487 -0.1105337 0.5644737

Exponential Smoothing

ets.model = ets(train_data, lambda = "auto")
summary(ets.model)
## ETS(A,N,A) 
## 
## Call:
##  ets(y = train_data, lambda = "auto") 
## 
##   Box-Cox transformation: lambda= -0.8999 
## 
##   Smoothing parameters:
##     alpha = 0.0373 
##     gamma = 1e-04 
## 
##   Initial states:
##     l = 0.9186 
##     s = 0.0193 0.0282 0.02 -0.0063 -0.0365 -0.0485
##            -0.0299 -0.0123 0.0104 0.0115 0.0209 0.023
## 
##   sigma:  0.0137
## 
##       AIC      AICc       BIC 
## -405.6735 -400.4561 -365.4415 
## 
## Training set error measures:
##                       ME      RMSE       MAE        MPE     MAPE      MASE
## Training set -0.02124583 0.5490853 0.4221956 -0.8578455 5.916325 0.6966621
##                     ACF1
## Training set -0.05954436
checkresiduals(ets.model)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(A,N,A)
## Q* = 25.068, df = 8, p-value = 0.001514
## 
## Model df: 14.   Total lags used: 22
ets.predict=forecast(ets.model,h=24)
autoplot(ets.predict)

autoplot(ets.predict) + autolayer(test_data) + ggtitle("ETS model Predict")

autoplot(test_data) +
  autolayer(ets.predict$mean, series="ETS Model") +
  ggtitle("Forecast Wind Speed for 2020-2021.") +
  xlab("Year") + ylab("Speed")

accuracy(ets.predict$mean, test_data)
##                   ME      RMSE       MAE      MPE     MAPE        ACF1
## Test set 0.001845533 0.5664101 0.4905484 -0.47117 7.525161 -0.08083932
##          Theil's U
## Test set 0.5704779

VAR Model

var.model <- VAR(cbind(t_train, train.agg.at),p = 1, type = "both",season="12")
coef(var.model)
## $t_train
##                      Estimate   Std. Error    t value     Pr(>|t|)
## t_train.l1       1.945798e-02 1.016407e-01  0.1914388 8.486037e-01
## train.agg.at.l1  6.664054e-04 4.609051e-04  1.4458627 1.516138e-01
## const            8.365441e-01 8.755570e-02  9.5544218 1.976393e-15
## trend           -8.906215e-05 3.505084e-05 -2.5409417 1.272979e-02
## sd1              8.043313e-03 5.786005e-03  1.3901323 1.678430e-01
## sd2              6.146996e-03 6.235105e-03  0.9858689 3.267831e-01
## sd3             -2.849053e-03 5.913634e-03 -0.4817770 6.311085e-01
## sd4             -7.796857e-03 5.307221e-03 -1.4691035 1.452160e-01
## sd5             -2.826709e-02 5.584224e-03 -5.0619543 2.118647e-06
## sd6             -4.691641e-02 7.562163e-03 -6.2040997 1.552763e-08
## sd7             -6.579023e-02 9.809546e-03 -6.7067559 1.583494e-09
## sd8             -5.794199e-02 1.176887e-02 -4.9233265 3.725991e-06
## sd9             -3.210988e-02 1.102789e-02 -2.9116989 4.509907e-03
## sd10            -8.642102e-03 8.722604e-03 -0.9907708 3.243962e-01
## sd11             1.631117e-03 6.051928e-03  0.2695203 7.881329e-01
## 
## $train.agg.at
##                      Estimate   Std. Error     t value     Pr(>|t|)
## t_train.l1      -1.695558e+01 21.093074289 -0.80384574 4.235582e-01
## train.agg.at.l1  3.938358e-01  0.095649710  4.11748051 8.350743e-05
## const            2.067765e+01 18.170069116  1.13800588 2.580730e-01
## trend           -1.102523e-04  0.007273955 -0.01515713 9.879396e-01
## sd1             -8.910908e-01  1.200745572 -0.74211456 4.599084e-01
## sd2              1.339412e+00  1.293945314  1.03513794 3.033178e-01
## sd3              6.029666e+00  1.227231776  4.91322551 3.881243e-06
## sd4              8.631057e+00  1.101385470  7.83654479 7.866543e-12
## sd5              1.290649e+01  1.158870664 11.13712807 9.595451e-19
## sd6              1.532741e+01  1.569344204  9.76676162 7.064525e-16
## sd7              1.653907e+01  2.035734120  8.12437451 1.983873e-12
## sd8              1.420674e+01  2.442344659  5.81684676 8.621108e-08
## sd9              1.098336e+01  2.288571275  4.79922033 6.134126e-06
## sd10             5.641507e+00  1.810165742  3.11656936 2.441266e-03
## sd11             1.877203e+00  1.255931303  1.49466984 1.384228e-01
var.predict <- predict(var.model, n.ahead = 24)
plot(var.predict)

p1<-ts(var.predict$fcst$t_train[,1],start=c(2020,1),end=c(2021,12),frequency=12)
autoplot(test_data) +
  autolayer(InvBoxCox(p1, lambda_train), series="VAR",) +
  ggtitle("Forecast Wind Speed for 2020-2021.") +
  xlab("Year") + ylab("Speed")

accuracy(InvBoxCox(var.predict$fcst$t_train[,1], lambda_train), test_data)
##                 ME      RMSE       MAE      MPE     MAPE       ACF1 Theil's U
## Test set 0.1534725 0.6000114 0.5376525 1.698566 8.092142 -0.0757317 0.6134944
checkresiduals(var.model$varresult$t_train)

## 
##  Breusch-Godfrey test for serial correlation of order up to 18
## 
## data:  Residuals
## LM test = 24.336, df = 18, p-value = 0.1443
serial.test(var.model, lags.pt=10, type="PT.asymptotic")
## 
##  Portmanteau Test (asymptotic)
## 
## data:  Residuals of VAR object var.model
## Chi-squared = 48.233, df = 36, p-value = 0.08358
plot(resid(var.model))

plot(resid(var.model), ylim=c(-10,10))
abline(0,1)